#!/usr/bin/Rscript
##########################################################################################
# Social Media and Political Agenda Setting
##########################################################################################
# Description
##########################################################################################
# Differences in Responsiveness Figure 4
##########################################################################################
# Contents
##########################################################################################
# 1) Dependencies
# 2) Data Import
# 3) Modified IRF functions form package 'vars'
# 4) Transform Results from IRF from logit to percentage
# 5) Main
# 6) Data Aggregation for Figure 4
# 7) Figure 4
##########################################################################################
# 1) Dependencies
##########################################################################################
library(dplyr)
library(ggplot2)
library(stringr)
library(forcats)
library(showtext)
library(MASS)
library(vars)
library(boot)
library(parallel)
library(doParallel)
library(foreach)
library(iterators)
##########################################################################################
# 2) Data Import
##########################################################################################
rm(list = ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)
parent_path <- getwd()

# - load fonts used in plots
font_add_google("Montserrat", "Montserrat")
font_add_google("Roboto", "Roboto")

# - define a global seed (used in all scripts)
set.seed(2019)

# - load auto regression models
final_input <- readRDS("../var/var_model_all_small.RDS")

# - topic names for model selection
mod_name_nice <- c("Environment", "Gender", "Immigration", "Europe", "All Four")

# - load ddl theme
suppressMessages(suppressWarnings(source('../ggplot_theme_ddl.R')))
#################################################################################
# 3) Modified IRF functions form package 'vars'
#################################################################################
irf.varest.mod <-
  function(x, impulse=NULL, response=NULL, n.ahead=10, ortho=TRUE, cumulative=TRUE, boot=TRUE, ci=0.95, runs=100, seed=2019, ...){
    if(!(class(x)=="varest")){
      stop("\nPlease provide an object of class 'varest', generated by 'VAR()'.\n")
    }
    y.names <- colnames(x$y)
    if(is.null(impulse)){
      impulse <- y.names
    } else {
      impulse <- as.vector(as.character(impulse))
      if(any(!(impulse %in% y.names))) {
        stop("\nPlease provide variables names in impulse\nthat are in the set of endogenous variables.\n")
      }
      impulse <- subset(y.names, subset = y.names %in% impulse)
    }
    if(is.null(response)){
      response <- y.names
    } else {
      response <- as.vector(as.character(response))
      if(any(!(response %in% y.names))){
        stop("\nPlease provide variables names in response\nthat are in the set of endogenous variables.\n")
      }
      response <- subset(y.names, subset = y.names %in% response)
    }
    ## Getting the irf
    irs <- .irf(x = x, impulse = impulse, response = response, y.names = y.names, n.ahead = n.ahead, ortho = ortho, cumulative = cumulative)
    ## Bootstrapping
    Lower <- NULL
    Upper <- NULL
    if(boot){
      ci <- as.numeric(ci)
      if((ci <= 0)|(ci >= 1)){
        stop("\nPlease provide a number between 0 and 1 for the confidence interval.\n")
      }
      ci <- 1 - ci
      # returns all simulated Values instead of just the lower and upper ci
      BOOT <- .boot.mod(x = x, n.ahead = n.ahead, runs = runs, ortho = ortho, cumulative = cumulative, impulse = impulse, response = response, ci = ci, seed = seed, y.names = y.names)
      Lower <- BOOT$Lower
      Upper <- BOOT$Upper
      All <- BOOT$All
    }
    result <- list(irf=irs, Lower=Lower, Upper=Upper, All=All, response=response, impulse=impulse, ortho=ortho, cumulative=cumulative, runs=runs, ci=ci, boot=boot, model = class(x))
    class(result) <- "varirf"
    return(result)
  }

.irf <-
  function(x, impulse, response, y.names, n.ahead, ortho, cumulative){
    if((class(x) == "varest") || (class(x) == "vec2var")){
      if(ortho){
        irf <- Psi(x, nstep = n.ahead)
      } else {
        irf <- Phi(x, nstep = n.ahead)
      }
    } else if((class(x) == "svarest") || (class(x) == "svecest")){
      irf <- Phi(x, nstep = n.ahead)
    }
    dimnames(irf) <- list(y.names, y.names, NULL)
    idx <- length(impulse)
    irs <- list()
    for(i in 1 : idx){
      irs[[i]] <- matrix(t(irf[response , impulse[i], 1 : (n.ahead + 1)]), nrow = n.ahead+1)
      colnames(irs[[i]]) <- response
      if(cumulative){
        if(length(response) > 1) irs[[i]] <- apply(irs[[i]], 2, cumsum)
        if(length(response) == 1){
          tmp <- matrix(cumsum(irs[[1]]))
          colnames(tmp) <- response
          irs[[1]] <- tmp
        }
      }
    }
    names(irs) <- impulse
    result <- irs
    return(result)
  }

.boot.mod <-
  function(x, n.ahead, runs, ortho, cumulative, impulse, response, ci, seed, y.names){
    if(!(is.null(seed))) set.seed(abs(as.integer(seed)))
    if(class(x) == "varest"){
      VAR <- eval.parent(x)
    } else {
      stop("Bootstrap not implemented for this class.\n")
    }
    p <- VAR$p
    K <- VAR$K
    obs <- VAR$obs
    total <- VAR$totobs
    type <- VAR$type
    B <- Bcoef(VAR)
    BOOT <- vector("list", runs)
    ysampled <- matrix(0, nrow = total, ncol = K)
    colnames(ysampled) <- colnames(VAR$y)
    Zdet <- NULL
    if(ncol(VAR$datamat) > (K * (p+1))){
      Zdet <- as.matrix(VAR$datamat[, (K * (p + 1) + 1):ncol(VAR$datamat)])
    }
    resorig <- scale(resid(VAR), scale = FALSE)
    B <- Bcoef(VAR)
    for(i in 1:runs){
      booted <- sample(c(1 : obs), replace=TRUE)
      resid <- resorig[booted, ]
      lasty <- c(t(VAR$y[p : 1, ]))
      ysampled[c(1 : p), ] <- VAR$y[c(1 : p), ]
      for(j in 1 : obs){
        lasty <- lasty[1 : (K * p)]
        Z <- c(lasty, Zdet[j, ])
        ysampled[j + p, ] <- B %*% Z + resid[j, ]
        lasty <- c(ysampled[j + p, ], lasty)
      }
      varboot <- update(VAR, y = ysampled)
      if(class(x) == "svarest"){
        varboot <- update(x, x = varboot)
      }
      BOOT[[i]] <- .irf(x = varboot, n.ahead = n.ahead, ortho = ortho, cumulative = cumulative, impulse = impulse, response = response, y.names=y.names)
    }
    runner <- runs
    lower <- ci / 2
    upper <- 1 - ci / 2
    mat.l <- matrix(NA, nrow = n.ahead + 1, ncol = length(response))
    mat.s <- array(NA, dim = c(n.ahead + 1, length(response), runs))
    mat.u <- matrix(NA, nrow = n.ahead + 1, ncol = length(response))
    Lower <- list()
    All <- list()
    Upper <- list()
    idx1 <- length(impulse)
    idx2 <- length(response)
    idx3 <- n.ahead + 1
    temp <- rep(NA, runs)
    for(j in 1 : idx1){
      for(m in 1 : idx2){
        for(l in 1 : idx3){
          for(i in 1 : runs){
            if(idx2 > 1){
              temp[i] <- BOOT[[i]][[j]][l, m]
            } else {
              temp[i] <- matrix(BOOT[[i]][[j]])[l, m]
            }
          }
          mat.l[l, m] <- quantile(temp, lower, na.rm = TRUE)
          mat.s[l, m, ] <- temp
          mat.u[l, m] <- quantile(temp, upper, na.rm = TRUE)
        }
      }
      colnames(mat.l) <- response
      colnames(mat.s) <- response
      colnames(mat.u) <- response
      Lower[[j]] <- mat.l
      All[[j]] <- mat.s
      Upper[[j]] <- mat.u
    }
    names(Lower) <- impulse
    names(Upper) <- impulse
    names(All) <- impulse
    result <- list(Lower = Lower, Upper = Upper, All = All)
    return(result)
  }
#################################################################################
# 4) Transform Results from IRF from logit to percenteage
#################################################################################
for(k in 1:4){
  irf_temp <- irf.varest.mod(final_input[[k]], n.ahead = 60, cumulative = TRUE, runs = 1000)
  
  # - a list with the elements of interest from the IRF object
  elements_to_pull <- c("All","irf")
  variables <- names(irf_temp$irf)
  
  # - initializing and filling a dataset with the IRF info
  irf_data_tmp <- NULL
  sim_data_tmp <- NULL
  
  for (el in elements_to_pull) {
    new_irf_info <- irf_temp[el][[1]]
    if(el == "All"){
      for (out in variables){
        # - this is an array (iterate through all simulations in dim 3)
        new_sim_var_data <- new_irf_info[[out]]
        sim_n_data_tmp <- NULL
        for(sim_n in 1:dim(new_sim_var_data)[3]){
          # - take inverse logit to transform the effects to percentage point changes
          new_sim_n_var_data_transf <- as.data.frame(
            sapply(1:ncol(new_sim_var_data[,,sim_n]), function(j)
              inv.logit(new_sim_var_data[,j,sim_n]) - 0.5))
          
          colnames(new_sim_n_var_data_transf) <- colnames(new_sim_var_data)
          new_sim_n_var_data_long <- new_sim_n_var_data_transf %>% tidyr::gather(cov, value)
          new_sim_n_var_data_long$out <- out
          new_sim_n_var_data_long$sim <- sim_n
          new_sim_n_var_data_long$day <- rep(1:nrow(new_sim_var_data),length(unique(new_sim_n_var_data_long$cov)))
          new_sim_n_var_data_long$e_type <- el
          sim_n_data_tmp <- rbind(sim_n_data_tmp, new_sim_n_var_data_long)
        }
        sim_data_tmp <- rbind(sim_data_tmp,sim_n_data_tmp)
        sim_max <- sim_n
      }
      
    } else {
      for (out in variables) {
        new_irf_var_data <- as.data.frame(new_irf_info[out][[1]])
        irf_n_data_tmp <- NULL
        for(sim_n in 1:sim_max){
          # - take inverse logit to transform the effects to percentage point changes
          new_irf_n_var_data_transf <- as.data.frame(
            sapply(1:ncol(new_irf_var_data), function(j)
              inv.logit(new_irf_var_data[,j]) - 0.5))
          
          colnames(new_irf_n_var_data_transf) <- colnames(new_irf_var_data)
          new_irf_n_var_data_long <- new_irf_n_var_data_transf %>% tidyr::gather(cov, value)
          new_irf_n_var_data_long$out <- out
          new_irf_n_var_data_long$sim <- sim_n
          new_irf_n_var_data_long$day <- rep(1:nrow(new_irf_var_data),length(unique(new_irf_n_var_data_long$cov)))
          new_irf_n_var_data_long$e_type <- el
          irf_n_data_tmp <- rbind(irf_n_data_tmp, new_irf_n_var_data_long)
        }
        irf_data_tmp <- rbind(irf_data_tmp,irf_n_data_tmp)
      }
    }
    irf_data_t <- rbind(irf_data_tmp, sim_data_tmp)
  }
  # - give easier labels to the estimate types
  irf_data_t$e_type <- recode(irf_data_t$e_type,
                              `irf` = "pe",
                              `All` = "sim_pe")
  if(k == 1){
    irf_data <- list(irf_data_t)
  } else {
    irf_data[[k]] <- irf_data_t
  }
  rm(irf_data_t, irf_data_tmp, sim_data_tmp, new_irf_n_var_data_transf, new_irf_n_var_data_long, 
     new_irf_var_data, new_sim_n_var_data_long, new_sim_n_var_data_transf, new_sim_var_data,
     irf_temp, new_irf_info)
  gc()
}
#################################################################################
# 5) Main
#################################################################################
for(l in 1:length(irf_data)){
  cat("Model ", l, " is now beeing calculated!\n")
  # - a vector with the name of the variables
  variables <- unique(irf_data[[l]]$cov)
  
  # - a vector with the simulations
  simlength <- unique(irf_data[[l]]$sim)
  simlength <- max(simlength)
  
  # - deciding the number of days to simulate
  DAYS <- 60
  
  new_irf_data <- data.frame(matrix(NA, ncol = 7, nrow = sim_max * 120))
  
  tmpirf <- irf_data[[l]]
  
  irf_data_tmp_all <- tmpirf %>%
    filter(day <= (DAYS + 1))
  
  no_cores <- 4 #detectCores() - 11
  # create the cluster for caret to use
  cl <- parallel::makeCluster(no_cores, setup_strategy = "sequential")
  registerDoParallel(cl)
  
new_irf_data <- foreach::foreach(sim_n = 1:simlength, .combine = "rbind", .packages = c("tcltk", "dplyr", "MASS", "boot", "forcats", "vars")) %dopar% {
    if(!exists("pb")) pb <- tkProgressBar("Parallel task", min=1, max=simlength)
    setTkProgressBar(pb, sim_n)
    
    irf_data_tmp <- irf_data_tmp_all %>% dplyr::filter(sim == sim_n)
    # - iterating through covariates
    for (covariate in variables) {
      # -iterating through outcomes
      for (outcome in variables) {
        # - skipping when covariate and response are the same
        if (covariate != outcome) {
          # - initializing a cumulative-shocks matrix for this scenario: two 2-dim 
          #     matrix, one matrix for the covariate and one matrix for the response,
          #     and one dimension for the point estimate and the one other dimension
          #     for the simulated values of the estimate of this simulation number 
          cov_mat <- array(0, dim = c(DAYS, DAYS, 2))
          out_mat <- array(0, dim = c(DAYS, DAYS, 2))
          
          # - pull the full 15-day IRFs for the endogenous covariate
          cov_resp <- irf_data_tmp %>%
            filter(cov == covariate, out == covariate) %>%
            # - remove 1st row: it's part of the set-up (repsonse at day 0)
            filter(day != 1) %>%
            mutate(day = day -1)
          
          # - pull the full 15-day IRFs for the particular outcome variable
          out_resp <- irf_data_tmp %>%
            filter(cov == covariate, out == outcome) %>%
            # - remove 1st row: it's part of the set-up (repsonse at day 0)
            filter(day != 1) %>%
            mutate(day = day -1)
          
          # - transforming the 15-day IRFs for the covariate and outcome to a wide
          #   2-column format (one column per estimate type: pe, pe_sim)
          or_cov_resp <- cov_resp %>%
            dplyr::select(day, value, e_type) %>%
            tidyr::spread(e_type, value) %>%
            dplyr::select(-day)
          
          or_out_resp <- out_resp %>%
            dplyr::select(day, value, e_type) %>%
            tidyr::spread(e_type, value) %>%
            dplyr::select(-day)
          
          # - fill up the first rows of the scenario matrices with the original 
          #   1-day shock responses
          cov_mat[1,,1:2] <- or_cov_resp %>%
            as.matrix()
          out_mat[1,,1:2] <- or_out_resp %>%
            as.matrix()
          
          for (i in 2:DAYS) {
            # - iterating through the rest of the 15 days, beyond the 1st one
            # - chekcing first how much attention the covariate group is predicted 
            #   to pay to the issue in day i-1
            cov_att_pe <- sum(cov_mat[,(i-1),1])
            
            # - calculating how big a new shock needs to be in order for the 
            #   covariate group to keep its attention to 100%
            cov_new_shock <- 1 - cov_att_pe
            
            # - re-scaling the original 100 percentage point shock to the new shock
            cov_new_resp <- or_cov_resp[1:(DAYS-(i-1)),] * cov_new_shock
            out_new_resp <- or_out_resp[1:(DAYS-(i-1)),] * cov_new_shock
            
            # - adding the response to this new shock to the scenario matrices
            cov_mat[i,i:DAYS,1:2] <- cov_new_resp %>%
              as.matrix()
            out_mat[i,i:DAYS,1:2] <- out_new_resp %>%
              as.matrix()
          }
          # - saving the output for this cov --> out 
          new_rows <- rbind(
            data.frame(
              cov = covariate,
              value = colSums(out_mat[,,1]),
              out = outcome,
              sim = sim_n,
              day = 1:DAYS,
              e_type = "pe",
              data_type = "structural"),
            data.frame(
              cov = covariate,
              value = colSums(out_mat[,,2]),
              out = outcome,
              sim = sim_n,
              day = 1:DAYS,
              e_type = "sim_pe",
              data_type = "structural")
          )
          
          rows_i <- nrow(new_rows)
          if(sim_n == 1){
            min_i <- 1
            max_i <- rows_i
          } else {
            min_i <- nrow(new_irf_data) + 1
            max_i <- nrow(new_irf_data) + rows_i
          }
          new_irf_data[min_i:max_i,] <- new_rows
          return(new_irf_data)
        }
      }
    }
    
  }
  parallel::stopCluster(cl)
  colnames(new_irf_data) <- c("cov", "value", "out", "sim", "day", "e_type", "data_type")
  new_irf_data <- na.omit(new_irf_data)
  cat("Model ", l, " parallel part ist completed!\n")
  
  # - merging this new type of IRFs with the "regular" 1-time-shock IRFs
  tmpirf$data_type <- "one_time_shock"
  tmpirf <- tmpirf %>%
    # - correct the original data for the fact that day 1 is just pre-setting
    filter(day != 1) %>% 
    mutate(day = day -1)
  
  all_irf_data <- rbind(tmpirf, new_irf_data)
  
  # - removing from the dataset cases in which covariate and outcome are the same
  all_irf_data <- all_irf_data %>% 
    filter(cov != out)
  
  # - a wide version of the dataset, with a separate column for each estimate type
  all_irf_data_wide <- all_irf_data %>% 
    tidyr::pivot_wider(names_from = e_type, values_from = value, values_fn = list(value = list)) %>% tidyr::unchop(everything())

  
  # - simulate one-time and structural shocks of 10 instead of 100 percentage pts.
  #   Present the results in 0-100 scale instead of 0-1
  all_irf_data_wide <- all_irf_data %>%
    mutate(value = (value / 10) * 100) %>% 
    tidyr::pivot_wider(names_from = e_type, values_from = value, values_fn = list(value = list)) %>% tidyr::unchop(everything())
  
  cat("Model ", l, " data is now in wide format!\n")
  
  # - better labels for the data type
  all_irf_data_wide$data_type <- recode(
    all_irf_data_wide$data_type,
    `one_time_shock` =  "Effect of a one time 10 percentage point attention increase at day 0",
    `structural` = "Effect of a structural 10 percentage point attention increase at day 0")
  
  all_irf_data_wide <- na.omit(all_irf_data_wide)
  
  if(l == 1){
    all_irf_data_wide_all <- list(all_irf_data_wide)
  } else {
    all_irf_data_wide_all[[l]] <- all_irf_data_wide
  }
}

saveRDS(all_irf_data_wide_all,
        "../var/onetime-structural-shock-irfs-results_all_small_diff_parallel_sim.RDS")
#################################################################################
# 6) Data Aggregation for Figure 4
#################################################################################
final_input <- readRDS("../var/onetime-structural-shock-irfs-results_all_small_diff_parallel_sim.RDS")
mod_name_nice <- c("Environment", "Gender", "Immigration", "Europe", "All Four")

for(n in 1:(length(final_input))){
  tmp <- final_input[[n]]
  tmp$topic <- mod_name_nice[n]
  
  if(n == 1){
    all_topics <- tmp
  } else if(n == 4){
    all_topics <- rbind(all_topics, tmp)
    all_topics <- all_topics %>% dplyr::filter(day %in% c(1,7))
    all_topics <- all_topics %>% dplyr::filter(out %in% c("Media_SMD", "Politician_TW", "Party_TW")) %>% 
                                 dplyr::filter(cov %in% c("Media_SMD", "Politician_TW", "Party_TW"))
    
    # - relabel the outcomes so they fit/look better in the plot
    all_topics$out <- recode(all_topics$out,
                             `Media_SMD` = "Newspaper Articles",
                             `Politician_TW` = "Tweets by Politicians",
                             `Party_TW` = "Tweets by Parties")
    
    # - relabel the covariates so they fit/look better in the plot
    all_topics$cov <- recode(all_topics$cov,
                             `Media_SMD` = "Newspaper Articles",
                             `Politician_TW` = "Tweets by Politicians",
                             `Party_TW` = "Tweets by Parties")
    
  } else {
    all_topics <- rbind(all_topics, tmp)
  }
  
}

plot_df_i <- all_topics %>%
  filter(day == 7 & data_type == "Effect of a one time 10 percentage point attention increase at day 0") %>%
  mutate(cov2 = case_when(cov == "Newspaper Articles" ~ "Newspapers", cov == "Tweets by Parties" ~ "Parties", 
                          cov == "Tweets by Politicians" ~ "Politicians")) %>%
  mutate(out2 = case_when(out == "Newspaper Articles" ~ "Newspapers", out == "Tweets by Parties" ~ "Parties", 
                          out == "Tweets by Politicians" ~ "Politicians")) %>%
  mutate(direction = paste0(cov2, " -> ", out2)) %>%
  mutate(TOPIC = str_to_upper(topic)) %>%
  mutate(topic = fct_relevel(topic, "Environment", "Gender", "Europe", "Immigration")) %>%
  mutate(topic_rev = fct_relevel(topic, "Immigration", "Europe", "Gender", "Environment"))


# - calculate differences from plot_df_i
all_topics_diff <- NULL
ci <- 1- 0.95
lower_ci <- ci / 2
upper_ci <- 1 - ci / 2
ik <- 0
alldirections <- unique(plot_df_i$direction)
for(directions1 in alldirections){
  ik <- ik + 1
  notrun <- alldirections[1:ik]
  for(directions2 in alldirections){
    for(topics in unique(plot_df_i$topic_rev)){
      if(!directions2 %in% notrun){
        # - only clculate if cases are different
        if(directions1 != directions2){
          dir1 <- plot_df_i %>% dplyr::filter(direction == directions1 & topic_rev == topics)
          dir2 <- plot_df_i %>% dplyr::filter(direction == directions2 & topic_rev == topics) 
          # - only calculate if directions are right a -> b & b -> a
          if(dir1$cov2[1] == dir2$out2[1]){
            if(dir1$out2[1] == dir2$cov2[1]){
              direction <- paste0(directions1, "\nvs.\n ", directions2)
              diff_pe <- mean(dir1$pe - dir2$pe)
              range_pe <- dir1$sim_pe - dir2$sim_pe
              lower_pe <- quantile(range_pe, lower_ci, na.rm = TRUE)
              upper_pe <- quantile(range_pe, upper_ci, na.rm = TRUE)
              pe_1 <- mean(dir1$pe)
              pe_2 <- mean(dir2$pe)
              new_rows <- rbind(
                data.frame(direction = direction,
                           Topic = topics,
                           diff_pe = diff_pe,
                           lower_pe = lower_pe,
                           upper_pe = upper_pe,
                           pe_1 = pe_1,
                           pe_2 = pe_2))
              
              all_topics_diff <- rbind(all_topics_diff, new_rows)
            }
          }
        }
      }
    }
  }
}
all_topics_diff <- all_topics_diff %>% mutate(Topic = fct_relevel(Topic, "Environment", "Gender", "Europe", "Immigration")) %>%
                                       mutate(Topic_rev = fct_relevel(Topic, "Immigration", "Europe", "Gender", "Environment"))
#################################################################################
# 7) Figure 4
#################################################################################
# - make facet names
facet_names <- c(`Politicians -> Parties\nvs.\n Parties -> Politicians` = "Politicians (A) vs Parties (B) ",
                 `Politicians -> Newspapers\nvs.\n Newspapers -> Politicians` = "Politicians (A) vs Newspapers (B) ",
                 `Parties -> Newspapers\nvs.\n Newspapers -> Parties` = "Parties (A) vs Newspapers (B)")

ggplot(data = all_topics_diff) +
  aes(x = Topic_rev, y = diff_pe, ymin = lower_pe, ymax = upper_pe, group = Topic_rev, color = Topic, shape = Topic) +
  geom_linerange(size = 2.5, position = position_dodge(width = 0.55), alpha = 0.5) +  
  geom_point(position = position_dodge(width = 0.55), size = 3.5) +
  geom_hline(aes(yintercept = 0), color = "black", linetype = 2, alpha = 0.75) +
  coord_flip() +
  labs (subtitle ="", 
        title ="", 
        y = "(B leads A)      Percentage points       (A leads B)", x = "") +
  scale_y_continuous(breaks = c(-2,-1,0,1,2,3,4)) +
  scale_color_manual(labels = c("Environment","Gender","Europe","Immigration"), 
                     values = c("#009E73","#DD2461","#0072B2","#999999")) +
  scale_fill_manual(labels = c("Environment","Gender","Europe","Immigration"), 
                    values = c("#009E73","#DD2461","#0072B2","#999999")) +
  scale_shape_manual(labels = c("Environment","Gender","Europe","Immigration"), values = c(15,16,17,18)) +
  facet_wrap(~ direction,  ncol = 1, nrow = 3, scales = "free_y", labeller = as_labeller(facet_names)) + 
  ddl_theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank()) +
  theme(legend.position = "none", legend.title = element_blank(), 
        strip.background = element_blank(), strip.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0.0, size = 16, color = "black"),  
        axis.text.y = element_text(hjust=0, size = 16, color = "black"),
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size = 16, color = "black"),
        axis.title = element_text(size = 16, color = "black"),
        plot.margin = unit(c(.5,.5,.5,.5), "cm"),
        axis.title.x = element_text(size = 16, color = "black", hjust = (3/8), 
                                    margin = margin(t = .5, r = 0, b = .5, l = 0, unit = "cm")),
        axis.title.y = element_text(size = 16, color = "black"),
        legend.text = element_text(size = 16, color = "black"),
        plot.title = element_blank(),
        legend.key.size = unit(1.5,"line"),
        legend.key = element_blank(),
        axis.line.x = element_line(color="black", size = .5),
        axis.line.y = element_line(color="black", size = .5),
        panel.spacing.y = unit(2.5, "lines"),
        panel.spacing.x = unit(.8, "lines")) +
  guides(color = guide_legend(override.aes = list(size=2)))

ggsave("../images/figure_4.pdf", width = 10, height = 7.5, device = cairo_pdf)
#################################################################################